--- title: "Untitled" author: "Renae L. Shrum" output: word_document --- # Old Faithful Data Analysis ## From R's dataset `faithful` ```{r,comment=""} data(faithful) attach(faithful) library(stargazer) stargazer(faithful,title="Summary Statistics for Old Faithful",type="text",flip=T) ``` ### Summary Statistics ```{r} summary(faithful) ``` ### Summary Statistics (without showing the code and '##') ```{r,echo=FALSE,comment=""} summary(faithful) ``` ### Graphs of eruptions and waiting: ```{r, echo=FALSE} hist(waiting,main="Histogram of Waiting") hist(eruptions,main='Histogram of Eruptions') boxplot(eruptions,main='Boxplot of eruptions') boxplot(waiting,main='Boxplot of waiting') ``` ### Regression model x=eruptions y=waiting ```{r,comment=""} faith.fit=lm(waiting~eruptions) summary(faith.fit) # diagnostic plots res=residuals(faith.fit) pred=fitted.values(faith.fit) # mean of residuals = 0 hist(res) # homogenous (constant) variance plot(pred,res,pch=17,main="Predicted vs. Residuals") abline(0,0) # independence of residuals order=c(1:length(res)) plot(order,res,type='l') abline(0,0) # another way to check independence of residuals # install car package if needed # load the car package library(car) durbinWatsonTest(faith.fit) # QQplot (normal probability plot) qqnorm(res) qqline(res) par(mfrow=c(1,1)) # load the car package library(car) # Leverage (outliers, influential points) qqPlot(faith.fit, simulate=T) # look at hat values plot(hatvalues(faith.fit)) abline(h=2*(length(faith.fit$coefficients))/length(faith.fit$residuals), col=2) # influence # look at the distance between the regression coefficients with the ith observation present or absent plot(cooks.distance(faith.fit)) abline(h=4/(length(faith.fit$residuals)-length(faith.fit$coefficients)), col=2) bound=1.5*(2*(length(faith.fit$coefficients))/length(faith.fit$residuals)) # Influence plot in car-package combines the studentized residuals, hat values and Cook's distances # area of the circles correspond to Cook's distances influencePlot(faith.fit, xlim=c(0,bound), ylim=c(-5,5)) ``` ### Hypothesis test: H0: mu(waiting)=68 vs. Ha: mu(waiting)!=68 ```{r,comment=""} t.test(waiting,mu=68,alternative="two.sided") ```